/* This is the new merge program for Transunion data.
As of 3/6/13, it merges internally created roll-up variables with a few TU roll-up variables
for years 97-04
*/

libname in '/data';
libname in_2003 '/data';

/*1997*/
proc sort data=in.roll_trades97;
     by teditseq97;
run;

proc sort data=in.ids;
     by teditseq97;
run;

data rollup_merger_97;
     merge in.roll_trades97(in=in1) in.ids(keep=teditseq97 pid in=in2);
     by teditseq97;
     if in1 and in2;
     year=1997;
run;

proc sort data=rollup_merger_97;
     by pid;
run;

/*333 variables on this dataset*/

/*1999*/
proc sort data=in.roll_trades99;
     by teditseq99;
run;

proc sort data=in.ids;
     by teditseq99;
run;

data rollup_merger_99;
     merge in.roll_trades99(in=in1) in.ids(keep=teditseq99 pid: in=in2);
     by teditseq99;
     if in1 and in2;
     year=1999;
run;

proc sort data=rollup_merger_99;
     by pid;
run;

/*333 variables on this dataset*/

/*2001*/
proc sort data=in.roll_trades01;
     by teditseq01;
run;

proc sort data=in.ids;
     by teditseq01;
run;

data rollup_merger_01;
     merge in.roll_trades01(in=in1) in.ids(keep=teditseq01 pid in=in2);
     by teditseq01;
     if in1 and in2;
     year=2001;
run;

proc sort data=rollup_merger_01;
     by pid;
run;

/*333 variables on this dataset*/

/*2003*/
proc sort data=in.roll_trades03;
     by teditseq03;
run;

proc sort data=in.ids;
     by teditseq03;
run;

data rollup_merger_03;
     merge in.roll_trades03(in=in1) in.ids(keep=teditseq03 pid person in=in2);
     by teditseq03;
     *if in1 and in2;
     year=2003;
run;

proc sort data=rollup_merger_03;
     by pid;
run;

/*340 variables on this dataset --  has additional location vars of its own*/

/*2004*/
proc sort data=in.roll_trades04;
     by teditseq04;
run;

proc sort data=in.ids;
     by teditseq04;
run;

/*there are 6 observations in the roll_trades dataset but not in our ids dataset.
this is because 2003 is our base year, so these people exist in the 2004 survey but not 2003, so get dropped from ID's*/
data rollup_merger_04;
     merge in.roll_trades04(in=in1) in.ids(keep=teditseq04 pid in=in2);
     by teditseq04;
     if in1 and in2;
     year=2004;
run;

proc sort data=rollup_merger_04;
     by pid;
run;

/*329 variables on this dataset*/

/*2007*/
proc sort data=in.roll_trades07;
     by permid;
run;

proc sort data=in.ids;
     by permid;
run;

/*there are 105 observations in the roll_trades dataset but not in our ids dataset.
this is because 2003 is our base year, so these people exist in the 2007 survey but not 2003, so get dropped from ID's*/
data rollup_merger_07;
     merge in.roll_trades07(in=in1) in.ids(keep=permid pid in=in2);
     by permid;
     if in1 and in2;
     year=2007;
run;

proc sort data=rollup_merger_07;
     by pid;
run;

/*336 variables on this dataset*/

/*2008*/
proc sort data=in.roll_trades08;
     by permid;
run;

proc sort data=in.ids;
     by permid;
run;

/*there are 104 observations in the roll_trades dataset but not in our ids dataset.
this is because 2003 is our base year, so these people exist in the 2008 survey but not 2003, so get dropped from ID's*/
data rollup_merger_08;
     merge in.roll_trades08(in=in1) in.ids(keep=permid pid in=in2);
     by permid;
     if in1 and in2;
     year=2008;
run;

proc sort data=rollup_merger_08;
     by pid;
run;

/*343 variables on this dataset*/

/*2010*/
proc sort data=in.roll_trades10;
     by permid2008;
run;

proc sort data=in.ids;
     by permid2008;
run;

data rollup_merger_10;
     merge in.roll_trades10(in=in1) in.ids(keep=permid2008 pid in=in2);
     by permid2008;
     if in1 and in2;
     year=2010;
run;

proc sort data=rollup_merger_10;
     by pid;
run;

/*336 variables on this dataset*/
proc contents data=rollup_merger_10;
run;

data rollup_merged_97_10;
     retain pid year;
     set rollup_merger_97 rollup_merger_99 rollup_merger_01 rollup_merger_03 rollup_merger_04 rollup_merger_07 rollup_merger_08 rollup_merger_10;
run;

proc sort data=rollup_merged_97_10;
     by pid year;
run; 
proc contents data=rollup_merged_97_10;
run;


/* this code makes sure the teditseq/person/permid variables are carried across all years in the panel */
data temp97(keep=pid teditseq97);
     set rollup_merged_97_10;
     if year=1997;
run;

data temp99(keep=pid teditseq99);
     set rollup_merged_97_10;
     if year=1999;     
run;

data temp01(keep=pid teditseq01);
     set rollup_merged_97_10;
     if year=2001;     
run;

data temp03(keep=pid person);
     set rollup_merged_97_10;
     if year=2003;     
run;

data temp04(keep=pid teditseq04);
     set rollup_merged_97_10;
     if year=2004;     
run;

data temp07(keep=pid permid);
     set rollup_merged_97_10;
     if year=2007;     
run;

data temp08(keep=pid permid);
     set rollup_merged_97_10;
     if year=2008;     
run;

data temp10(keep=pid permid);
     set rollup_merged_97_10;
     if year=2010;     
run;


data rollup_merged_97_10;
     set rollup_merged_97_10(drop= teditseq: permid person);
run;

data in.rollup_merged_97_10;
     merge temp97 temp99 temp01 temp03 temp04 temp07 temp08 temp10 rollup_merged_97_10;
     by pid;
     teditseq03=pid;
run;

proc sort data=in.rollup_merged_97_10;
     by pid year;
run;

proc contents data=in.rollup_merged_97_10;
run;

proc print data=in.rollup_merged_97_10 (obs=100);
var year teditseq: permid person;
title 'identifier info';
run;

proc print data=in.rollup_merged_97_10 (obs=30);
title 'this is it';
run;

endsas;



